From ca479cec72cb4361e10fa663ee429205bd912ba3 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Wed, 9 Feb 2005 16:24:23 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.560 (420a3937bsia3-KUKXK3oI9TTu7gaA) Loads more x86_64 work. entry.S is now done so we reflect exceptions and interrupts to the guest. The only missing parts are new hypercalls to set FS_BASE, GS_BASE_KERN, GS_BASE_USER. Also the concept of two L4 pagetable bases -- one for user, one for kernel. Signed-off-by: keir.fraser@cl.cam.ac.uk --- xen/arch/x86/dom0_ops.c | 2 + xen/arch/x86/domain.c | 138 ++++++++++++++ xen/arch/x86/traps.c | 12 +- xen/arch/x86/x86_32/entry.S | 13 +- xen/arch/x86/x86_32/traps.c | 2 +- xen/arch/x86/x86_64/asm-offsets.c | 9 +- xen/arch/x86/x86_64/entry.S | 266 +++++++++++++++++++++++++-- xen/include/asm-x86/config.h | 6 + xen/include/asm-x86/processor.h | 56 +++--- xen/include/asm-x86/regs.h | 2 +- xen/include/asm-x86/x86_32/regs.h | 35 +--- xen/include/asm-x86/x86_64/current.h | 7 +- xen/include/asm-x86/x86_64/regs.h | 34 +--- xen/include/public/arch-x86_32.h | 48 +++-- xen/include/public/arch-x86_64.h | 63 ++++--- 15 files changed, 522 insertions(+), 171 deletions(-) diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c index fdc5790220..e019a5feb7 100644 --- a/xen/arch/x86/dom0_ops.c +++ b/xen/arch/x86/dom0_ops.c @@ -355,6 +355,8 @@ void arch_getdomaininfo_ctxt( sizeof(ed->arch.user_ctxt)); if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) ) c->flags |= ECF_I387_VALID; + if ( GUESTOS_MODE(ed, &ed->arch.user_ctxt) ) + c->flags |= ECF_IN_GUESTOS; memcpy(&c->fpu_ctxt, &ed->arch.i387, sizeof(ed->arch.i387)); diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 130a6df107..17ca885313 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -286,6 +287,8 @@ void arch_do_createdomain(struct exec_domain *ed) d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = mk_l3_pgentry(__pa(d->arch.mm_perdomain_l2) | __PAGE_HYPERVISOR); #endif + + ed->arch.flags = TF_guestos_mode; } } @@ -295,6 +298,7 @@ void arch_do_boot_vcpu(struct exec_domain *ed) ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail; ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT); + ed->arch.flags = TF_guestos_mode; } #ifdef CONFIG_VMX @@ -445,6 +449,10 @@ int arch_final_setup_guestos( if ( c->flags & ECF_I387_VALID ) set_bit(EDF_DONEFPUINIT, &d->ed_flags); + d->arch.flags &= ~TF_guestos_mode; + if ( c->flags & ECF_IN_GUESTOS ) + d->arch.flags |= TF_guestos_mode; + memcpy(&d->arch.user_ctxt, &c->cpu_ctxt, sizeof(d->arch.user_ctxt)); @@ -558,12 +566,21 @@ void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) #ifdef CONFIG_VMX unsigned long vmx_domain = next_p->arch.arch_vmx.flags; #endif +#ifdef __x86_64__ + int all_segs_okay = 1; +#endif __cli(); /* Switch guest general-register state. */ if ( !is_idle_task(prev_p->domain) ) { +#ifdef __x86_64__ + __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (stack_ec->ds) ); + __asm__ __volatile__ ( "movl %%es,%0" : "=m" (stack_ec->es) ); + __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (stack_ec->fs) ); + __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (stack_ec->gs) ); +#endif memcpy(&prev_p->arch.user_ctxt, stack_ec, sizeof(*stack_ec)); @@ -642,6 +659,127 @@ void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) load_LDT(next_p); __sti(); + +#ifdef __x86_64__ + +#define loadsegment(seg,value) ({ \ + int __r = 1; \ + __asm__ __volatile__ ( \ + "1: movl %k1,%%" #seg "\n2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: xorl %k0,%k0\n" \ + " movl %k0,%%" #seg "\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ + ".previous" \ + : "=r" (__r) : "r" (value), "0" (__r) );\ + __r; }) + + /* Either selector != 0 ==> reload. */ + if ( unlikely(prev_p->arch.user_ctxt.ds) || + unlikely(next_p->arch.user_ctxt.ds) ) + all_segs_okay &= loadsegment(ds, next_p->arch.user_ctxt.ds); + + /* Either selector != 0 ==> reload. */ + if ( unlikely(prev_p->arch.user_ctxt.es) || + unlikely(next_p->arch.user_ctxt.es) ) + all_segs_okay &= loadsegment(es, next_p->arch.user_ctxt.es); + + /* + * Either selector != 0 ==> reload. + * Also reload to reset FS_BASE if it was non-zero. + */ + if ( unlikely(prev_p->arch.user_ctxt.fs) || + unlikely(prev_p->arch.user_ctxt.fs_base) || + unlikely(next_p->arch.user_ctxt.fs) ) + { + all_segs_okay &= loadsegment(fs, next_p->arch.user_ctxt.fs); + if ( prev_p->arch.user_ctxt.fs ) /* != 0 selector kills fs_base */ + prev_p->arch.user_ctxt.fs_base = 0; + } + + /* + * Either selector != 0 ==> reload. + * Also reload to reset GS_BASE if it was non-zero. + */ + if ( unlikely(prev_p->arch.user_ctxt.gs) || + unlikely(prev_p->arch.user_ctxt.gs_base_os) || + unlikely(prev_p->arch.user_ctxt.gs_base_app) || + unlikely(next_p->arch.user_ctxt.gs) ) + { + /* Reset GS_BASE with user %gs. */ + all_segs_okay &= loadsegment(gs, next_p->arch.user_ctxt.gs); + /* Reset KERNEL_GS_BASE if we won't be doing it later. */ + if ( !next_p->arch.user_ctxt.gs_base_os ) + wrmsr(MSR_KERNEL_GS_BASE, 0, 0); + if ( prev_p->arch.user_ctxt.gs ) /* != 0 selector kills app gs_base */ + prev_p->arch.user_ctxt.gs_base_app = 0; + } + + /* This can only be non-zero if selector is NULL. */ + if ( next_p->arch.user_ctxt.fs_base ) + wrmsr(MSR_FS_BASE, + next_p->arch.user_ctxt.fs_base, + next_p->arch.user_ctxt.fs_base>>32); + + /* This can only be non-zero if selector is NULL. */ + if ( next_p->arch.user_ctxt.gs_base_os ) + wrmsr(MSR_KERNEL_GS_BASE, + next_p->arch.user_ctxt.gs_base_os, + next_p->arch.user_ctxt.gs_base_os>>32); + + /* This can only be non-zero if selector is NULL. */ + if ( next_p->arch.user_ctxt.gs_base_app ) + wrmsr(MSR_GS_BASE, + next_p->arch.user_ctxt.gs_base_app, + next_p->arch.user_ctxt.gs_base_app>>32); + + /* If in guest-OS mode, switch the GS bases around. */ + if ( next_p->arch.flags & TF_guestos_mode ) + __asm__ __volatile__ ( "swapgs" ); + + if ( unlikely(!all_segs_okay) ) + { + unsigned long *rsp = + (next_p->arch.flags & TF_guestos_mode) ? + (unsigned long *)stack_ec->rsp : + (unsigned long *)next_p->arch.guestos_sp; + + if ( put_user(stack_ec->ss, rsp- 1) | + put_user(stack_ec->rsp, rsp- 2) | + put_user(stack_ec->rflags, rsp- 3) | + put_user(stack_ec->cs, rsp- 4) | + put_user(stack_ec->rip, rsp- 5) | + put_user(stack_ec->gs, rsp- 6) | + put_user(stack_ec->fs, rsp- 7) | + put_user(stack_ec->es, rsp- 8) | + put_user(stack_ec->ds, rsp- 9) | + put_user(stack_ec->r11, rsp-10) | + put_user(stack_ec->rcx, rsp-11) ) + { + DPRINTK("Error while creating failsafe callback frame.\n"); + domain_crash(); + } + + if ( !(next_p->arch.flags & TF_guestos_mode) ) + { + next_p->arch.flags |= TF_guestos_mode; + __asm__ __volatile__ ( "swapgs" ); + /* XXX switch page tables XXX */ + } + + stack_ec->entry_vector = TRAP_syscall; + stack_ec->rflags &= 0xFFFCBEFFUL; + stack_ec->ss = __GUEST_SS; + stack_ec->rsp = (unsigned long)(rsp-11); + stack_ec->cs = __GUEST_CS; + stack_ec->rip = next_p->arch.failsafe_address; + } + +#endif /* __x86_64__ */ } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index d867cd8f12..cde6911646 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -146,7 +146,7 @@ static inline int do_trap(int trapnr, char *str, DEBUGGER_trap_entry(trapnr, regs); - if ( !GUEST_FAULT(regs) ) + if ( !GUEST_MODE(regs) ) goto xen_fault; #ifndef NDEBUG @@ -217,7 +217,7 @@ asmlinkage int do_int3(struct xen_regs *regs) DEBUGGER_trap_entry(TRAP_int3, regs); - if ( !GUEST_FAULT(regs) ) + if ( !GUEST_MODE(regs) ) { DEBUGGER_trap_fatal(TRAP_int3, regs); show_registers(regs); @@ -316,7 +316,7 @@ asmlinkage int do_page_fault(struct xen_regs *regs) return EXCRET_fault_fixed; /* successfully copied the mapping */ } - if ( !GUEST_FAULT(regs) ) + if ( !GUEST_MODE(regs) ) goto xen_fault; #ifndef NDEBUG @@ -485,7 +485,7 @@ asmlinkage int do_general_protection(struct xen_regs *regs) if ( regs->error_code & 1 ) goto hardware_gp; - if ( !GUEST_FAULT(regs) ) + if ( !GUEST_MODE(regs) ) goto gp_in_kernel; /* @@ -522,7 +522,7 @@ asmlinkage int do_general_protection(struct xen_regs *regs) /* Emulate some simple privileged instructions when exec'ed in ring 1. */ if ( (regs->error_code == 0) && - GUESTOS_FAULT(regs) && + GUESTOS_MODE(ed, regs) && emulate_privileged_op(regs) ) return 0; @@ -685,7 +685,7 @@ asmlinkage int do_debug(struct xen_regs *regs) goto out; } - if ( !GUEST_FAULT(regs) ) + if ( !GUEST_MODE(regs) ) { /* Clear TF just for absolute sanity. */ regs->eflags &= ~EF_TF; diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index fd10779f5d..45fec67827 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -58,12 +58,13 @@ #include #include #include +#include #include -#define GET_CURRENT(reg) \ - movl $8192-4, reg; \ - orl %esp, reg; \ - andl $~3,reg; \ +#define GET_CURRENT(reg) \ + movl $STACK_SIZE-4, reg; \ + orl %esp, reg; \ + andl $~3,reg; \ movl (reg),reg; #ifdef CONFIG_VMX @@ -169,10 +170,6 @@ vmx_process_softirqs: call SYMBOL_NAME(do_softirq) jmp vmx_test_all_events #endif - -ENTRY(continue_nonidle_task) - GET_CURRENT(%ebx) - jmp test_all_events ALIGN restore_all_guest: diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index f0fa02181b..5f907a1cbf 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -88,7 +88,7 @@ void show_registers(struct xen_regs *regs) unsigned long esp; unsigned short ss, ds, es, fs, gs; - if ( GUEST_FAULT(regs) ) + if ( GUEST_MODE(regs) ) { esp = regs->esp; ss = regs->ss & 0xffff; diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index 2dd6055f0a..27d9e4d857 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -37,6 +37,8 @@ void __dummy__(void) OFFSET(XREGS_eflags, struct xen_regs, eflags); OFFSET(XREGS_rsp, struct xen_regs, rsp); OFFSET(XREGS_ss, struct xen_regs, ss); + OFFSET(XREGS_kernel_sizeof, struct xen_regs, es); + DEFINE(XREGS_user_sizeof, sizeof(struct xen_regs)); BLANK(); OFFSET(EDOMAIN_processor, struct exec_domain, processor); @@ -47,12 +49,11 @@ void __dummy__(void) OFFSET(EDOMAIN_failsafe_addr, struct exec_domain, arch.failsafe_address); OFFSET(EDOMAIN_trap_bounce, struct exec_domain, arch.trap_bounce); OFFSET(EDOMAIN_thread_flags, struct exec_domain, arch.flags); + OFFSET(EDOMAIN_guestos_sp, struct exec_domain, arch.guestos_sp); BLANK(); - OFFSET(SHINFO_upcall_pending, shared_info_t, - vcpu_data[0].evtchn_upcall_pending); - OFFSET(SHINFO_upcall_mask, shared_info_t, - vcpu_data[0].evtchn_upcall_mask); + OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending); + OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask); BLANK(); OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code); diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index 132d8adad3..4ee4e648ed 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -9,8 +9,86 @@ #include #include #include +#include #include +#define GET_CURRENT(reg) \ + movq $STACK_SIZE-8, reg; \ + orq %rsp, reg; \ + andq $~7,reg; \ + movq (reg),reg; + + ALIGN +restore_all_guest: + testb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx) + jnz failsafe_callback + RESTORE_ALL + testw $TRAP_syscall,4(%rsp) + jz 1f + addq $8,%rsp + popq %rcx + addq $8,%rsp + popq %r11 + popq %rsp + sysretq +1: addq $8,%rsp +FLT1: iretq +.section .fixup,"ax" +FIX1: popq -15*8-8(%rsp) # error_code/entry_vector + SAVE_ALL # 15*8 bytes pushed + movq -8(%rsp),%rsi # error_code/entry_vector + sti # after stack abuse (-1024(%rsp)) + pushq $__HYPERVISOR_DS # SS + leaq 8(%rsp),%rax + pushq %rax # RSP + pushf # RFLAGS + pushq $__HYPERVISOR_CS # CS + leaq DBLFLT1(%rip),%rax + pushq %rax # RIP + pushq %rsi # error_code/entry_vector + jmp error_code +DBLFLT1:GET_CURRENT(%rbx) + jmp test_all_events +DBLFIX1:GET_CURRENT(%rbx) + testb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx) + jnz domain_crash # cannot reenter failsafe code + orb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx) + jmp test_all_events # will return via failsafe code +.previous +.section __pre_ex_table,"a" + .quad FLT1,FIX1 +.previous +.section __ex_table,"a" + .quad DBLFLT1,DBLFIX1 +.previous + +/* No special register assumptions */ +failsafe_callback: + GET_CURRENT(%rbx) + andb $~TF_failsafe_return,EDOMAIN_thread_flags(%rbx) + leaq EDOMAIN_trap_bounce(%rbx),%rdx + movq EDOMAIN_failsafe_addr(%rbx),%rax + movq %rax,TRAPBOUNCE_eip(%rdx) + movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx) + call create_bounce_frame + RESTORE_ALL + addq $8,%rsp +FLT2: iret +.section .fixup,"ax" +FIX2: pushq %rbx + GET_CURRENT(%rbx) + orb $TF_failsafe_return,EDOMAIN_thread_flags(%rbx) + popq %rbx + jmp FIX1 +.section __pre_ex_table,"a" + .quad FLT2,FIX2 +.previous + + ALIGN +restore_all_xen: + RESTORE_ALL + addq $8,%rsp + iretq /* * %rax = hypercall vector @@ -18,6 +96,7 @@ * %r11, %rcx = SYSCALL-saved %rflags and %rip * NB. We must move %r10 to %rcx for C function-calling ABI. */ + ALIGN ENTRY(hypercall) sti movl $__GUEST_SS,8(%rsp) @@ -25,35 +104,194 @@ ENTRY(hypercall) pushq $__GUEST_CS pushq %rcx pushq $0 + movl $TRAP_syscall,4(%rsp) SAVE_ALL movq %r10,%rcx andq $(NR_hypercalls-1),%rax leaq SYMBOL_NAME(hypercall_table)(%rip),%rbx callq *(%rbx,%rax,8) - RESTORE_ALL - addq $8,%rsp - popq %rcx - addq $8,%rsp - popq %r11 - cli - popq %rsp - sysretq - + GET_CURRENT(%rbx) + +ret_from_hypercall: + movq %rax,XREGS_rax(%rsp) # save the return value + +test_all_events: + cli # tests must not race interrupts +/*test_softirqs:*/ + movl EDOMAIN_processor(%rbx),%eax + shl $6,%rax # sizeof(irq_cpustat) == 64 + leaq SYMBOL_NAME(irq_stat)(%rip),%rcx + testl $~0,(%rcx,%rax,1) + jnz process_softirqs +/*test_guest_events:*/ + movq EDOMAIN_vcpu_info(%rbx),%rax + testb $0xFF,VCPUINFO_upcall_mask(%rax) + jnz restore_all_guest + testb $0xFF,VCPUINFO_upcall_pending(%rax) + jz restore_all_guest +/*process_guest_events:*/ + leaq EDOMAIN_trap_bounce(%rbx),%rdx + movq EDOMAIN_event_addr(%rbx),%rax + movq %rax,TRAPBOUNCE_eip(%rdx) + movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) + call create_bounce_frame + movq EDOMAIN_vcpu_info(%rbx),%rax + movb $1,VCPUINFO_upcall_mask(%rax) # Upcalls masked during delivery + jmp restore_all_guest + + ALIGN +process_softirqs: + sti + call SYMBOL_NAME(do_softirq) + jmp test_all_events + +/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */ +/* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ +/* %rdx == trap_bounce, %rbx == task_struct */ +/* %rax,%rcx are clobbered. %rsi contains new XREGS_rsp. */ +create_bounce_frame: + /* Push new frame at existing %rsp if already in guest-OS mode. */ + movq XREGS_rsp+8(%rsp),%rsi + testb $TF_guestos_mode,EDOMAIN_thread_flags(%rbx) + jnz 1f + /* Push new frame at registered guest-OS stack top. */ + movq EDOMAIN_guestos_sp(%rbx),%rsi +1: movq $HYPERVISOR_VIRT_START,%rax + cmpq %rax,%rsi + jb 1f # In +ve address space? Then okay. + movq $HYPERVISOR_VIRT_END+60,%rax + cmpq %rax,%rsi + jb domain_crash # Above Xen private area? Then okay. +1: subq $40,%rsi + movq XREGS_ss+8(%rsp),%rax +FLT3: movq %rax,32(%rsi) # SS + movq XREGS_rsp+8(%rsp),%rax +FLT4: movq %rax,24(%rsi) # RSP + movq XREGS_eflags+8(%rsp),%rax +FLT5: movq %rax,16(%rsi) # RFLAGS + movq XREGS_cs+8(%rsp),%rax +FLT6: movq %rax,8(%rsi) # CS + movq XREGS_rip+8(%rsp),%rax +FLT7: movq %rax,(%rsi) # RIP + movb TRAPBOUNCE_flags(%rdx),%cl + testb $TBF_EXCEPTION_ERRCODE,%cl + jz 1f + subq $8,%rsi + movq TRAPBOUNCE_error_code(%rdx),%rax +FLT8: movq %rax,(%rsi) # ERROR CODE + testb $TBF_EXCEPTION_CR2,%cl + jz 2f + subq $8,%rsi + movq TRAPBOUNCE_cr2(%rdx),%rax +FLT9: movq %rax,(%rsi) # CR2 +1: testb $TBF_FAILSAFE,%cl + jz 2f + subq $32,%rsi + movl %gs,%eax +FLT10: movq %rax,24(%rsi) # GS + movl %fs,%eax +FLT11: movq %rax,16(%rsi) # FS + movl %es,%eax +FLT12: movq %rax,8(%rsi) # ES + movl %ds,%eax +FLT13: movq %rax,(%rsi) # DS +2: subq $16,%rsi + movq XREGS_r11+8(%rsp),%rax +FLT14: movq %rax,(%rsi) # R11 + movq XREGS_rcx+8(%rsp),%rax +FLT15: movq %rax,(%rsi) # RCX + /* Rewrite our stack frame and return to guest-OS mode. */ + /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ + movb $0,TRAPBOUNCE_flags(%rdx) + testb $TF_guestos_mode,EDOMAIN_thread_flags(%rbx) + jnz 1f + orb $TF_guestos_mode,EDOMAIN_thread_flags(%rbx) + swapgs + /* XXX switch page tables XXX */ +1: movl $TRAP_syscall,XREGS_entry_vector+8(%rsp) + andl $0xfffcbeff,XREGS_eflags+8(%rsp) + movl $__GUEST_SS,XREGS_ss+8(%rsp) + movq %rsi,XREGS_rsp+8(%rsp) + movl $__GUEST_CS,XREGS_cs+8(%rsp) + movq TRAPBOUNCE_eip(%rdx),%rax + movq %rax,XREGS_rip+8(%rsp) + ret +.section .fixup,"ax" +FIX3: sti + popq %rsi + addq $8,%rsp # Discard create_b_frame return address + pushq $__HYPERVISOR_DS # SS + leaq 8(%rsp),%rax + pushq %rax # RSP + pushf # RFLAGS + pushq $__HYPERVISOR_CS # CS + leaq DBLFLT2(%rip),%rax + pushq %rax # RIP + pushq %rsi # error_code/entry_vector + jmp error_code +DBLFLT2:jmp process_guest_exception_and_events +.previous +.section __pre_ex_table,"a" + .quad FLT3,FIX3 , FLT4,FIX3 , FLT5,FIX3 , FLT6,FIX3 + .quad FLT7,FIX3 , FLT8,FIX3 , FLT9,FIX3 , FLT10,FIX3 + .quad FLT11,FIX3 , FLT12,FIX3 , FLT13,FIX3 , FLT14,FIX3 , FLT15,FIX3 +.previous +.section __ex_table,"a" + .quad DBLFLT2,domain_crash +.previous + + ALIGN +process_guest_exception_and_events: + leaq EDOMAIN_trap_bounce(%rbx),%rdx + testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) + jz test_all_events + cli # create_bounce_frame needs CLI for pre-exceptions to work + call create_bounce_frame + jmp test_all_events + + ALIGN ENTRY(ret_from_intr) -restore_all_xen: - RESTORE_ALL - addq $8,%rsp - iretq + GET_CURRENT(%rbx) + testb $3,XREGS_cs(%rsp) + jnz test_all_events + jmp restore_all_xen + ALIGN error_code: SAVE_ALL - sti + testb $X86_EFLAGS_IF>>8,XREGS_eflags+1(%rsp) + jz exception_with_ints_disabled +1: sti movq %rsp,%rdi movl XREGS_entry_vector(%rsp),%eax leaq SYMBOL_NAME(exception_table)(%rip),%rdx callq *(%rdx,%rax,8) jmp restore_all_xen +exception_with_ints_disabled: + testb $3,XREGS_cs(%rsp) # interrupts disabled outside Xen? + jnz 1b # it really does happen! + # (e.g., DOM0 X server) + movq XREGS_rip(%rsp),%rdi + call search_pre_exception_table + testq %rax,%rax # no fixup code for faulting EIP? + jz FATAL_exception_with_ints_disabled + movq %rax,XREGS_rip(%rsp) + movq %rsp,%rsi + subq $8,%rsp + movq %rsp,%rdi + movq $XREGS_kernel_sizeof/8,%rcx + rep; movsq # make room for error_code/entry_vector + movq XREGS_error_code(%rsp),%rax # error_code/entry_vector + movq %rax,XREGS_kernel_sizeof(%rsp) + jmp restore_all_xen # return to fixup code + +FATAL_exception_with_ints_disabled: + movl XREGS_entry_vector(%rsp),%edi + movq %rsp,%rsi + call SYMBOL_NAME(fatal_trap) + ud2 + ENTRY(divide_error) pushq $0 movl $TRAP_divide_error,4(%rsp) diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 69563b8264..7e4c48e9d1 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -111,10 +111,16 @@ extern void __out_of_line_bug(int line) __attribute__((noreturn)); #define XENHEAP_DEFAULT_MB (16) #define PML4_ENTRY_BITS 39 +#ifndef __ASSEMBLY__ #define PML4_ENTRY_BYTES (1UL << PML4_ENTRY_BITS) #define PML4_ADDR(_slot) \ ((((_slot ## UL) >> 8) * 0xffff000000000000UL) | \ (_slot ## UL << PML4_ENTRY_BITS)) +#else +#define PML4_ENTRY_BYTES (1 << PML4_ENTRY_BITS) +#define PML4_ADDR(_slot) \ + (((_slot >> 8) * 0xffff000000000000) | (_slot << PML4_ENTRY_BITS)) +#endif /* * Memory layout: diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index abbb21e3cf..e462f5bb97 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -88,27 +88,30 @@ /* * Trap/fault mnemonics. */ -#define TRAP_divide_error 0 -#define TRAP_debug 1 -#define TRAP_nmi 2 -#define TRAP_int3 3 -#define TRAP_overflow 4 -#define TRAP_bounds 5 -#define TRAP_invalid_op 6 -#define TRAP_no_device 7 -#define TRAP_double_fault 8 -#define TRAP_copro_seg 9 -#define TRAP_invalid_tss 10 -#define TRAP_no_segment 11 -#define TRAP_stack_error 12 -#define TRAP_gp_fault 13 -#define TRAP_page_fault 14 -#define TRAP_spurious_int 15 -#define TRAP_copro_error 16 -#define TRAP_alignment_check 17 -#define TRAP_machine_check 18 -#define TRAP_simd_error 19 -#define TRAP_deferred_nmi 31 +#define TRAP_divide_error 0 +#define TRAP_debug 1 +#define TRAP_nmi 2 +#define TRAP_int3 3 +#define TRAP_overflow 4 +#define TRAP_bounds 5 +#define TRAP_invalid_op 6 +#define TRAP_no_device 7 +#define TRAP_double_fault 8 +#define TRAP_copro_seg 9 +#define TRAP_invalid_tss 10 +#define TRAP_no_segment 11 +#define TRAP_stack_error 12 +#define TRAP_gp_fault 13 +#define TRAP_page_fault 14 +#define TRAP_spurious_int 15 +#define TRAP_copro_error 16 +#define TRAP_alignment_check 17 +#define TRAP_machine_check 18 +#define TRAP_simd_error 19 +#define TRAP_deferred_nmi 31 + +/* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */ +#define TRAP_syscall 256 /* NB. Same as ECF_IN_SYSCALL */ /* * Non-fatal fault/trap handlers return an error code to the caller. If the @@ -119,19 +122,16 @@ #define EXCRET_not_a_fault 1 /* It was a trap. No instruction replay needed. */ #define EXCRET_fault_fixed 1 /* It was fault that we fixed: try a replay. */ -/* - * 'trap_bounce' flags values. - */ +/* 'trap_bounce' flags values */ #define TBF_EXCEPTION 1 #define TBF_EXCEPTION_ERRCODE 2 #define TBF_EXCEPTION_CR2 4 #define TBF_INTERRUPT 8 #define TBF_FAILSAFE 16 -/* - * thread.flags values. - */ -#define TF_failsafe_return 1 +/* arch_exec_domain' flags values */ +#define TF_failsafe_return 1 +#define TF_guestos_mode 2 #ifndef __ASSEMBLY__ diff --git a/xen/include/asm-x86/regs.h b/xen/include/asm-x86/regs.h index 3a9f5edb02..2f7528f327 100644 --- a/xen/include/asm-x86/regs.h +++ b/xen/include/asm-x86/regs.h @@ -31,6 +31,6 @@ enum EFLAGS { EF_ID = 0x00200000, /* id */ }; -#define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r))) +#define GUEST_MODE(_r) (likely(VM86_MODE(_r) || !RING_0(_r))) #endif /* __X86_REGS_H__ */ diff --git a/xen/include/asm-x86/x86_32/regs.h b/xen/include/asm-x86/x86_32/regs.h index ca05876ac6..943a982b07 100644 --- a/xen/include/asm-x86/x86_32/regs.h +++ b/xen/include/asm-x86/x86_32/regs.h @@ -1,37 +1,8 @@ #ifndef _I386_REGS_H #define _I386_REGS_H -#include - -/* So that we can use 'l' modifier in printf-style format strings. */ -#define u32 unsigned long - -struct xen_regs -{ - /* All saved activations contain the following fields. */ - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 eax; - u16 error_code; - u16 entry_vector; - u32 eip; - u32 cs; - u32 eflags; - - /* Only saved guest activations contain the following fields. */ - u32 esp; - u32 ss; - u32 es; - u32 ds; - u32 fs; - u32 gs; -} __attribute__ ((packed)); - -#undef u32 +#include +#include #define VM86_MODE(_r) ((_r)->eflags & EF_VM) #define RING_0(_r) (((_r)->cs & 3) == 0) @@ -39,6 +10,6 @@ struct xen_regs #define RING_2(_r) (((_r)->cs & 3) == 2) #define RING_3(_r) (((_r)->cs & 3) == 3) -#define GUESTOS_FAULT(_r) (!VM86_MODE(_r) && RING_1(_r)) +#define GUESTOS_MODE(_e, _r) (!VM86_MODE(_r) && RING_1(_r)) #endif diff --git a/xen/include/asm-x86/x86_64/current.h b/xen/include/asm-x86/x86_64/current.h index 0442f2db0f..efa170f775 100644 --- a/xen/include/asm-x86/x86_64/current.h +++ b/xen/include/asm-x86/x86_64/current.h @@ -33,11 +33,16 @@ static inline execution_context_t *get_execution_context(void) return execution_context; } +/* + * Get the top-of-stack, as stored in the per-CPU TSS. This is actually + * 64 bytes below the real top of the stack to allow space for: + * domain pointer, DS, ES, FS, GS, FS_BASE, GS_BASE_OS, GS_BASE_APP + */ static inline unsigned long get_stack_top(void) { unsigned long p; __asm__ ( "orq %%rsp,%0; andq $~7,%0" - : "=r" (p) : "0" (STACK_SIZE-8) ); + : "=r" (p) : "0" (STACK_SIZE-64) ); return p; } diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h index 6446c0c7f1..cf63ec49c4 100644 --- a/xen/include/asm-x86/x86_64/regs.h +++ b/xen/include/asm-x86/x86_64/regs.h @@ -1,41 +1,15 @@ #ifndef _X86_64_REGS_H #define _X86_64_REGS_H -#include +#include +#include -struct xen_regs -{ - u64 r15; - u64 r14; - u64 r13; - u64 r12; - union { u64 rbp; u64 ebp; } __attribute__ ((packed)); - union { u64 rbx; u64 ebx; } __attribute__ ((packed)); - /* NB. Above here is C callee-saves. */ - u64 r11; - u64 r10; - u64 r9; - u64 r8; - union { u64 rax; u64 eax; } __attribute__ ((packed)); - union { u64 rcx; u64 ecx; } __attribute__ ((packed)); - union { u64 rdx; u64 edx; } __attribute__ ((packed)); - union { u64 rsi; u64 esi; } __attribute__ ((packed)); - union { u64 rdi; u64 edi; } __attribute__ ((packed)); - u32 error_code; - u32 entry_vector; - union { u64 rip; u64 eip; } __attribute__ ((packed)); - u64 cs; - union { u64 rflags; u64 eflags; } __attribute__ ((packed)); - union { u64 rsp; u64 esp; } __attribute__ ((packed)); - u64 ss; -} __attribute__ ((packed)); - -#define VM86_MODE(_r) ((_r)->eflags & EF_VM) +#define VM86_MODE(_r) (0) /* No VM86 support in long mode. */ #define RING_0(_r) (((_r)->cs & 3) == 0) #define RING_1(_r) (((_r)->cs & 3) == 1) #define RING_2(_r) (((_r)->cs & 3) == 2) #define RING_3(_r) (((_r)->cs & 3) == 3) -#define GUESTOS_FAULT(_r) (!VM86_MODE(_r) && RING_3(_r)) +#define GUESTOS_MODE(_e, _r) ((_e)->arch.flags & TF_guestos_mode) #endif diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h index cb2af27582..9caffe3e35 100644 --- a/xen/include/public/arch-x86_32.h +++ b/xen/include/public/arch-x86_32.h @@ -94,27 +94,36 @@ typedef struct { memory_t address; /* 4: code address */ } PACKED trap_info_t; /* 8 bytes */ -typedef struct +/* So that we can use 'l' modifier in printf-style format strings. */ +#define u32 unsigned long + +typedef struct xen_regs { - unsigned long ebx; - unsigned long ecx; - unsigned long edx; - unsigned long esi; - unsigned long edi; - unsigned long ebp; - unsigned long eax; - unsigned long _unused; - unsigned long eip; - unsigned long cs; - unsigned long eflags; - unsigned long esp; - unsigned long ss; - unsigned long es; - unsigned long ds; - unsigned long fs; - unsigned long gs; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; + u32 eax; + u16 error_code; /* private */ + union { + u16 entry_vector; /* private */ + u16 flags; + } PACKED; + u32 eip; + u32 cs; + u32 eflags; + u32 esp; + u32 ss; + u32 es; + u32 ds; + u32 fs; + u32 gs; } PACKED execution_context_t; +#undef u32 + typedef u64 tsc_timestamp_t; /* RDTSC timestamp */ /* @@ -123,7 +132,8 @@ typedef u64 tsc_timestamp_t; /* RDTSC timestamp */ */ typedef struct { #define ECF_I387_VALID (1<<0) -#define ECF_VMX_GUEST (2<<0) +#define ECF_VMX_GUEST (1<<1) +#define ECF_IN_GUESTOS (1<<2) unsigned long flags; execution_context_t cpu_ctxt; /* User-level CPU registers */ char fpu_ctxt[256]; /* User-level FPU registers */ diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h index 530e2431d1..2f37fea6c0 100644 --- a/xen/include/public/arch-x86_64.h +++ b/xen/include/public/arch-x86_64.h @@ -103,33 +103,41 @@ typedef struct { memory_t address; /* 8: code address */ } PACKED trap_info_t; /* 16 bytes */ -typedef struct +typedef struct xen_regs { - unsigned long r15; - unsigned long r14; - unsigned long r13; - unsigned long r12; - union { unsigned long rbp, ebp; } PACKED; - union { unsigned long rbx, ebx; } PACKED; - unsigned long r11; - unsigned long r10; - unsigned long r9; - unsigned long r8; - union { unsigned long rax, eax; } PACKED; - union { unsigned long rcx, ecx; } PACKED; - union { unsigned long rdx, edx; } PACKED; - union { unsigned long rsi, esi; } PACKED; - union { unsigned long rdi, edi; } PACKED; - unsigned long _unused; - union { unsigned long rip, eip; } PACKED; - unsigned long cs; - union { unsigned long rflags, eflags; } PACKED; - union { unsigned long rsp, esp; } PACKED; - unsigned long ss; - unsigned long es; - unsigned long ds; - unsigned long fs; - unsigned long gs; + u64 r15; + u64 r14; + u64 r13; + u64 r12; + union { u64 rbp, ebp; } PACKED; + union { u64 rbx, ebx; } PACKED; + u64 r11; + u64 r10; + u64 r9; + u64 r8; + union { u64 rax, eax; } PACKED; + union { u64 rcx, ecx; } PACKED; + union { u64 rdx, edx; } PACKED; + union { u64 rsi, esi; } PACKED; + union { u64 rdi, edi; } PACKED; + u32 error_code; /* private */ + union { + u32 entry_vector; /* private */ +#define ECF_IN_SYSCALL (1<<8) /* Guest synchronously interrupted by SYSCALL? */ + u32 flags; + } PACKED; + union { u64 rip, eip; } PACKED; + u64 cs; + union { u64 rflags, eflags; } PACKED; + union { u64 rsp, esp; } PACKED; + u64 ss; + u64 es; + u64 ds; + u64 fs; /* Non-zero => takes precedence over fs_base. */ + u64 gs; /* Non-zero => takes precedence over gs_base_app. */ + u64 fs_base; + u64 gs_base_os; + u64 gs_base_app; } PACKED execution_context_t; typedef u64 tsc_timestamp_t; /* RDTSC timestamp */ @@ -140,7 +148,8 @@ typedef u64 tsc_timestamp_t; /* RDTSC timestamp */ */ typedef struct { #define ECF_I387_VALID (1<<0) -#define ECF_VMX_GUEST (2<<0) +#define ECF_VMX_GUEST (1<<1) +#define ECF_IN_GUESTOS (1<<2) unsigned long flags; execution_context_t cpu_ctxt; /* User-level CPU registers */ char fpu_ctxt[512]; /* User-level FPU registers */ -- 2.30.2